## Who's Cheating on Your Survey? --------------------------------
## A Detection Approach with Digital Trace Data -----------------
## Reproduction materials (Appendix) ----------------------------

set.seed(1234)

# load packages
source("code/packages.r")

# load permute model helper
source("code/func_model_permute.R")

# load data
load("data/ger_question.RData")
load("data/ger_respondent.RData")
load("data/ger_dat_response.RData")
load("data/subjects_long_df.RData")
load("data/ger_time_model.RData")
load("data/domains_user_df.RData")

## Setting up log
my_log <- file("02-appendix-log.txt") # File name of output log
sink(my_log, append = TRUE, type = "output") # Writing console output to log file
sink(my_log, append = TRUE, type = "message")

cat(readChar(rstudioapi::getSourceEditorContext()$path, # Writing currently opened R script to file
             file.info(rstudioapi::getSourceEditorContext()$path)$size))

## -------------------------------------------------------------
## Figure C2. Distribution of items cheated on for cheaters ----
## -------------------------------------------------------------
ger_distribution_plot <- 
  ger_respondent %>% 
  dplyr::filter(resp_cheated == 1) %>%
  ggplot(., aes(x=resp_prop_cheat)) +
  geom_histogram(aes(y = (..count..)/sum(..count..)), bins = 25) +
  scale_x_continuous(limits = c(0, 1)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  theme_minimal(base_family = "Helvetica") +
  labs(y = "Percentage of cheaters\n",
       x = "Proportion of items cheated on") +
  geom_curve(data = data.frame(x = 0.3, y = 0.35, xend = 0.2, yend = 0.1),
             mapping = aes(x = x, y = y, xend = xend, yend = yend),
             size = 0.5, angle = 52L, colour = "black", curvature = 0.5,
             arrow = structure(list(angle = 30, length = structure(0.1, class = "unit", valid.unit = 2L,unit = "inches"), ends = 2L,type = 2L),
                               class = "arrow"),
             inherit.aes = FALSE, show.legend = FALSE) + 
  geom_text(data = data.frame(x = 0.58, y = 0.36,
                              label = "About 92% of the cheaters consulted outside sources\nfor less than 25% of the items they encountered"),
            mapping = aes(x = x, y = y, label = label),
            size = 3.4, angle = 0L, lineheight = 1L,  hjust = 0.5,
            vjust = 0.5, colour = "black", family = "Helvetica", fontface = "plain",
            inherit.aes = FALSE, show.legend = FALSE)

ger_distribution_plot

ggsave("figures/fig-c2-ger_cheating_distribution.png", ger_distribution_plot, width = 18, height = 9, units = "cm")


## -------------------------------------------------------------
## Figure C3. Web-tracking availability respondents char -------
## -------------------------------------------------------------

# setting up and running stan_glmer
options(mc.cores = parallel::detectCores() - 2)
had_hit_stan_model = stan_glmer(was_matched ~ female + educ_cat + age.2 + polinterest.1 + internal_efficacy + prop_correct + (1|personid) + (1|wave), 
                                data = subjects_long_df,family = binomial, sparse = TRUE, chains = 2, seed = 1234)

summary(had_hit_stan_model)

#save(had_hit_stan_model, file = "data/analysis/had_hit_stan_model.RData")

## -------------------------------------------------------------
## The model will take some time to run. if you prefer to load --
## If you prefer to load the results, you can "uncomment" the --
## next line ---------------------------------------------------

#load("data/analysis/had_hit_stan_model.RData") 

# coefficient plot
label_list <- c("Gender: Female", "Education: Intermediate", "Education: High",
                "Age", "Political interest","Internal efficacy", "Political Knowledge\n(Composite)")

ci_model <- broom::tidy(had_hit_stan_model, conf.int = T) %>% 
  dplyr::filter(term != "(Intercept)") %>%
  dplyr::mutate(conf.low.8 = broom::tidy(had_hit_stan_model, conf.int = T, conf.level = 0.8) %>% dplyr::filter(term != "(Intercept)") %>% pull(conf.low),
                conf.high.8 = broom::tidy(had_hit_stan_model, conf.int = T, conf.level = 0.8)%>% dplyr::filter(term != "(Intercept)") %>% pull(conf.high),
                label = factor(label_list, levels = rev(label_list))) 

pdf(file="figures/fig-c3-respondent_was_matched_stan_ci.pdf", height=3.5, width=5, family="Helvetica")
ggplot(ci_model, aes(x = estimate, y= label)) +
  geom_point(size = 2) +
  geom_text(aes(label = round(estimate,2)), position = position_nudge(y = 0.3), size = 3) +
  geom_vline(aes(xintercept = 0), linetype = "longdash", color = "black", size = 0.2) +
  geom_segment(aes(y = label, yend = label, x = conf.low, xend = conf.high), size = 0.4) +
  geom_segment(aes(y = label, yend = label, x = conf.low.8, xend = conf.high.8), size = 0.8) +
  theme_sjplot() +
  theme(legend.position = "none") +
  ggtitle("") +  
  labs(y ="",
       x= "Log odds") + 
  theme(plot.margin = unit(c(0,0.2,0,0), "cm")) #t,r,b,l
dev.off()


## -------------------------------------------------------------
## Figure C4. Respondent characteristics on cheating -----------
## -------------------------------------------------------------

# Panel a. Cheated (yes/no)
summary(respondent_m1 <- rstanarm::stan_glm(resp_cheated ~ gender + age.2 + educ_cat + internal_efficacy + 
                                    resp_prop_correct_wo_cheating + twohoursweek, data = ger_respondent), digits = 3)

# Panel b. Proportion of cheating (lm())
summary(respondent_m2_lm <- lm(resp_prop_cheat ~ gender + age.2 + educ_cat + internal_efficacy + 
                                 resp_prop_correct_wo_cheating + twohoursweek, data = ger_respondent)) 

#coefficient plots
label_list <- c("Gender: Female", "Age", "Education: Intermediate", 
                "Education: High", "Internal efficacy","Knowledge score",
                "Habitual surveytaker")

# plot for panel a
stan_m1_ci <- broom::tidy(respondent_m1, conf.int = T) %>% 
  dplyr::filter(term != "(Intercept)") %>%
  dplyr::mutate(label = factor(label_list, levels = rev(label_list)))

stan_m1_ci_8 <- broom::tidy(respondent_m1, conf.int = T, conf.level = 0.8) %>% 
  dplyr::filter(term != "(Intercept)") %>%
  dplyr::mutate(label = factor(label_list, levels = rev(label_list)))

p_m1_ci <- ggplot(stan_m1_ci, aes(x = estimate, y= label)) +
  geom_point(size = 2) +
  scale_x_continuous(limits = c(-.75, .75)) +
  geom_text(aes(label = round(estimate,2)), position = position_nudge(y = 0.3), size = 3) +
  geom_vline(aes(xintercept = 0), linetype = "longdash", color = "black", size = 0.2) +
  geom_segment(aes(y = label, yend = label, x = conf.low, xend = conf.high), size = 0.4) +
  geom_segment(aes(y = label, yend = label, x = stan_m1_ci_8$conf.low, xend = stan_m1_ci_8$conf.high), size = 0.8) +
  ggtitle("Cheated (yes/no)") +  
  labs(y ="",
       x= "Log odds")  + theme_sjplot() +
  theme(plot.margin = unit(c(0.2,0.2,0,0), "cm")) #t,r,b,l

p_m1_ci

# plot for panel b
p_m2_ci <- get_model_data(respondent_m2_lm, type = "std2", rm.terms = c("log-fit_ratio", "R2")) %>% 
  dplyr::mutate(conf_int = paste0("(",round(conf.low,3),";",round(conf.high,3), ")"),
                conf.low.8 = get_model_data(respondent_m2_lm, type = "std2", rm.terms = c("log-fit_ratio", "R2"), ci.lvl = 0.8) %>% pull(conf.low),
                conf.high.8 = get_model_data(respondent_m2_lm, type = "std2", rm.terms = c("log-fit_ratio", "R2"), ci.lvl = 0.8) %>% pull(conf.high),
                term = factor(label_list, levels = rev(label_list))) %>%
  ggplot(., aes(x = estimate, y= term)) +
  geom_point(size = 2) +
  scale_x_continuous(limits = c(-.75, .75)) +
  geom_text(aes(label = round(estimate,2)), position = position_nudge(y = 0.3), size = 3) +
  geom_segment(aes(y = term, yend = term, x = conf.low, xend = conf.high), size = 0.4) +
  geom_segment(aes(y = term, yend = term, x = conf.low.8, xend = conf.high.8), size = 0.8) +
  geom_vline(aes(xintercept = 0), linetype = "longdash", color = "black", size = 0.2) +
  ggtitle("Proportion of cheating (lm())") +  
  labs(y ="",
       x= "Standardized estimates") +
  theme_sjplot() +
  theme(plot.margin = unit(c(0.2,0.2,0,0), "cm"),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()
  ) #t,r,b,l

p_m2_ci

# creating figure c4
pdf(file="figures/fig-c4-respondent_stan_m1m2_habitual_ci.pdf", height=4, width=8.5, family="Helvetica")
grid.arrange(grobs = list(p_m1_ci, p_m2_ci), 
             ncol = 2,
             widths = c(1.17, .83),
             layout_matrix = rbind(c(1, 2)))
dev.off()


## -------------------------------------------------------------
## Figure C5. Respondent, question, and timing on cheating -----
## -------------------------------------------------------------

# setting up and running stan_glmer
timing_m1_stan = rstanarm::stan_glmer(as.numeric(cheat) ~ female + educ_cat + age.2 + n_items + ltimes + (1 | question) + 
                                        (1 | personid), family = binomial, data = ger_time_model, sparse = TRUE, chains = 2, seed = 1234)
summary(timing_m1_stan)

# coefficient plot
label_list <- c("Gender: Female","Education: Intermediate", 
                "Education: High",  "Age", "Number of items","log(time(seconds))")

timing_m1_ci <- broom::tidy(timing_m1_stan, conf.int = T) %>% 
  dplyr::filter(term != "(Intercept)") %>%
  dplyr::mutate(conf_int = paste0("(",round(conf.low,3),";",round(conf.high,3), ")"),
                label = factor(label_list, levels = rev(label_list)))

timing_m1_ci.8 <- broom::tidy(timing_m1_stan, conf.int = T, conf.level = 0.8) %>% 
  dplyr::filter(term != "(Intercept)") %>%
  dplyr::mutate(label = factor(label_list, levels = rev(label_list)))

pdf(file="figures/fig-c5-timing_stan_m1_ci.pdf", height=3.5, width=5, family="Helvetica")
ggplot(timing_m1_ci, aes(x = estimate, y= label)) +
  geom_point(size = 2) +
  geom_text(aes(label = round(estimate,2)), position = position_nudge(y = 0.3), size = 3) +
  geom_vline(aes(xintercept = 0), linetype = "longdash", color = "black", size = 0.2) +
  geom_segment(aes(y = label, yend = label, x = conf.low, xend = conf.high), size = 0.4) +
  geom_segment(aes(y = label, yend = label, x = timing_m1_ci.8$conf.low, xend = timing_m1_ci.8$conf.high), size = 0.8) +
  ggtitle("") +  
  labs(y ="",
       x= "Log odds")  + theme_sjplot() +
  theme(plot.margin = unit(c(0.2,0.2,0,0), "cm")) #t,r,b,l
dev.off()

## -------------------------------------------------------------
## Figure C6. Density plots of response times by cheating st ---
## -------------------------------------------------------------

time_sum <- ger_time_model %>% group_by(cheat) %>% summarize(n_obs = n(),
                                                             mean_time = mean(times, na.rm = TRUE),
                                                             median_time = median(times, na.rm = TRUE),
                                                             mean_ltime = mean(ltimes, na.rm = TRUE),
                                                             median_ltime = median(ltimes, na.rm = TRUE))

# density plot of time cheater vs. no cheater
ger_time_model$cheatlab <- ifelse(ger_time_model$cheat, "Yes", "No")
median_nocheater_lab <- paste0("Med ^ {no} == ", round(time_sum$median_time[time_sum$cheat == FALSE], 0), "~s")
median_cheater_lab <- paste0("Med ^ {yes} == ", round(time_sum$median_time[time_sum$cheat == TRUE], 0), "~s")

cols <- c(rgb(44, 105, 169, alpha = .5, maxColorValue = 255), rgb(219,0,23, alpha = .5, maxColorValue = 255))
pdf(file="figures/fig-c6-time-distribution.pdf", height=4, width=7, family="Helvetica")
theme_set(theme_sjplot())
ggplot() + 
  geom_density(data = ger_time_model, aes(x = times, group = cheatlab, fill = cheatlab), alpha = 0.5, adjust = 2) + 
  geom_segment(aes(x = time_sum$median_time[time_sum$cheat == FALSE], xend = time_sum$median_time[time_sum$cheat == FALSE], y = 0, yend = .475)) + 
  geom_segment(aes(x = time_sum$median_time[time_sum$cheat == TRUE], xend = time_sum$median_time[time_sum$cheat == TRUE], y = 0, yend = .475)) + 
  annotate('text', x = time_sum$median_time[time_sum$cheat == FALSE], y = .49, 
           label = median_nocheater_lab, parse = TRUE, size = 3)  + 
  annotate('text', x = time_sum$median_time[time_sum$cheat == TRUE], y = .49, 
           label = median_cheater_lab, parse = TRUE, size = 3)  + 
  annotate('text', x = 12, y = .15, 
           label = "Not cheated", size = 4, fontface = "bold")  +   
  annotate('text', x = 200, y = .15, 
           label = "Cheated", size = 4, fontface = "bold")  +   
  xlab("Response time (seconds)") +
  ylab("Density") + 
  scale_x_continuous(expand = c(0,0), trans = 'log', breaks = c(1, 5, 10, 20, 30, 60, 100, 200, 1000, 2500)) + 
  scale_y_continuous(limits = c(0, .5), expand = c(0,0)) + 
  scale_fill_manual(values=cols) + 
  theme(legend.position = "none")
dev.off()

## -------------------------------------------------------------
## Figure C7. Cheating instances by item (%) -------------------
## -------------------------------------------------------------

cheating_instances_item_plot <- 
  ggplot(ger_question, aes(x = reorder(item_label, ques_prop_cheat) , y = ques_prop_cheat)) +
  geom_bar(stat = "identity", aes(fill = ques_type_diff)) +
  scale_fill_manual(values = c("#e66101", "#fdb863", "#b2abd2", "#5e3c99"))+
  coord_flip() +
  theme_minimal(base_family = "Helvetica") +
  theme(legend.position = "bottom") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1),
                     limits = c(0,0.15)) +
  labs(y="Percentage of cheating instances",
       x="",
       fill="")

ggsave("figures/fig-c7-ger_item_distribution.png", cheating_instances_item_plot, width = 25, height = 30, units = "cm")


## -------------------------------------------------------------
## Figure C8. Diff-in-coef tests (Distribution of Z-scores) ----
## -------------------------------------------------------------

#define dependent variables
depvar_naive <- ger_respondent %>% dplyr::select(contains("resp_prop_correct"), -contains("wo_cheating")) %>% names()
depvar_wo_cheating <- ger_respondent %>% dplyr::select(contains("resp_prop_correct") & contains("wo_cheating")) %>% names()

#covars
respondent_covars <- c("female", "educ.2", "age.2", "polinterest.1", "internal_efficacy")

# running models ----------------

naive_list <- list()
naive_list <- lapply(depvar_naive, function(i) {
  model_permute(depvar = i,  covars = c(respondent_covars), data = ger_respondent)
})


corrected_list <- list()
corrected_list <- lapply(depvar_wo_cheating, function(i) {
  model_permute(depvar = i,  covars = c(respondent_covars), data = ger_respondent)
})

# test and df extraction ----------------

#composite
comp_df <- data_from_permute(measure = "Composite", 1)

#factual
fact_df <- data_from_permute(measure = "Factual", 2)

#elite-text
et_df <- data_from_permute(measure = "Elite - Text", 3)

#elite-pictures
ep_df <- data_from_permute(measure = "Elite - Pictures", 4)

#event
event_df <- data_from_permute(measure = "Event", 5)

#scaled
scaled_df <- data_from_permute(measure = "Composite (Scaled)", 6)

#merge dfs
merged_df <- dplyr::bind_rows(comp_df, scaled_df, fact_df, et_df, ep_df, event_df) %>%
  dplyr::mutate(var_verb = case_when(var == "age.2" ~ "Age",
                                     var == "educ.2" ~ "Education",
                                     var == "femaleTRUE" ~ "Female",
                                     var == "polinterest.1" ~ "Political Interest",
                                     var == "internal_efficacy" ~ "Internal Efficacy"
  )) %>%
  dplyr::group_by(index,var_verb) %>%
  dplyr::mutate(avg = mean(z))

#plot -------

#density
z_score_density <- ggplot(merged_df, aes(x = z)) +
  scale_x_continuous(limits = c(-2.5,2.5)) +
  geom_rect(data=NULL,aes(xmin=-Inf,xmax=-1.96,ymin=0,ymax=Inf),
            fill="#ffcccb", alpha = 0.1) +
  geom_rect(data=NULL,aes(xmin=1.96,xmax=Inf,ymin=0,ymax=Inf),
            fill="#ffcccb", alpha = 0.1) +
  geom_density() +
  geom_vline(xintercept = 0, linetype = "longdash", color = "#b3b3b3") +
  geom_vline(aes(xintercept = avg) , linetype = "longdash", color = "red") +
  facet_grid(index~var_verb) +
  theme_bw() +
  theme(strip.background = element_rect(fill = "white"),
        strip.text = element_text(colour = 'navy', face = "bold"),
        text = element_text(family = "Helvetica")) +
  labs(x = "Z-score",
       y = "Density")

z_score_density

ggsave(z_score_density, file = "figures/fig-c8-z_distribution_dens_red.png", width = 3, height = 4, units = "cm", dpi = 300, scale = 5)

## -------------------------------------------------------------
## Figure C9. Parallel navigation (Top 25 domains) -------------
## -------------------------------------------------------------

# Panel a. Germany
ger_parallel_user_top <- dplyr::filter(domains_user_df, country == "GER") %>%
  ggplot(., aes(x=reorder(domain, number), y=number)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  theme_minimal() +
  theme(axis.title.y = element_blank()) +
  labs(y = "Number of unique users (domain)")

ger_parallel_user_top

ggsave("figures/fig-c9a-parallel_top_users_ger.pdf", ger_parallel_user_top)

# Panel b. USA
us_parallel_user_top <- dplyr::filter(domains_user_df, country == "USA") %>%
  ggplot(., aes(x=reorder(domain, number), y=number)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  theme_minimal() +
  theme(axis.title.y = element_blank()) +
  labs(y = "Number of unique users (domain)")

us_parallel_user_top

ggsave("figures/fig-c9b-parallel_top_users_us.pdf", us_parallel_user_top)

## -------------------------------------------------------------
## Figure C10. Average time spent per week on survey-taking ----
## -------------------------------------------------------------
duration_plot <- ger_dat_response %>%
  ggplot(., aes(x = duration_per_week)) +geom_histogram(aes(y = (..count..)/sum(..count..)), bins = 100) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  scale_x_continuous(breaks = seq(0,900,100)) +
  theme_minimal(base_family = "Helvetica") +
  geom_vline(aes(xintercept = 120), linetype = "dashed", color = "#cc0055") +
  labs(y = "Percentage of respondents\n",
       x = "Minutes per week")

duration_plot

ggsave("figures/fig-c10-ger_platforms_time_distribution.png", duration_plot, width = 18, height = 9, units = "cm")

## -------------------------------------------------------------
## Tables C1-C4. Cheating interaction models  (prediction) -----
## -------------------------------------------------------------
## models
iv_list <- ger_respondent %>% dplyr::select(dplyr::starts_with("resp_prop_correct"), -dplyr::contains("wo_cheating")) %>% names()
dv_list <- c("polinterest.1", "votecertainty.1", "internal_efficacy", "likelihoodvote.1")
respondent_covars <- c("female", "educ.2", "age.2")

ger_respondent_model <- ger_respondent %>%
  dplyr::mutate_at(dv_list, scale)

empty_list <- list()
for (i in dv_list) {
  empty_list[[i]] <- purrr::cross2(glue::glue(i), glue::glue("~ {iv_list}*resp_cheated +")) %>% 
    purrr::map_chr(paste, sep = "", collapse = " ") %>%
    paste0(.,  paste0(respondent_covars, collapse = " + "))
}


# table output
names_for_coefs <- c("resp_prop_correct"="Political knowledge",
                     "resp_prop_correct_scaled"="Political knowledge",
                     "resp_prop_correct_elite_pic"="Political knowledge",
                     "resp_prop_correct_elite_text"="Political knowledge",
                     "resp_prop_correct_event"="Political knowledge",
                     "resp_prop_correct_factual"="Political knowledge",
                     "resp_cheatedTRUE"="Cheated",
                     "femaleTRUE" = "Female",
                     "educ.2" = "Education",
                     "age.2" = "Age",
                     "resp_prop_correct:resp_cheatedTRUE"="Political knowledge x Cheated",
                     "resp_prop_correct_scaled:resp_cheatedTRUE"="Political knowledge x Cheated",
                     "resp_prop_correct_elite_pic:resp_cheatedTRUE"="Political knowledge x Cheated",
                     "resp_prop_correct_elite_text:resp_cheatedTRUE"="Political knowledge x Cheated",
                     "resp_prop_correct_event:resp_cheatedTRUE"="Political knowledge x Cheated",
                     "resp_prop_correct_factual:resp_cheatedTRUE"="Political knowledge x Cheated"
)

#political interest
political_interest <- list(
  "Composite" = lm(polinterest.1 ~ resp_prop_correct*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Composite\n(Scaled)" = lm(polinterest.1 ~ resp_prop_correct_scaled*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Elite - Pictures" = lm(polinterest.1 ~ resp_prop_correct_elite_pic*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Elite - Text" = lm(polinterest.1 ~ resp_prop_correct_elite_text*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Event" = lm(polinterest.1 ~ resp_prop_correct_event*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Factual" = lm(polinterest.1 ~ resp_prop_correct_factual*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model)
)

#vote certainty
vote_certainty <- list(
  "Composite" = lm(votecertainty.1 ~ resp_prop_correct*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Composite\n(Scaled)" = lm(votecertainty.1 ~ resp_prop_correct_scaled*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Elite - Pictures" = lm(votecertainty.1 ~ resp_prop_correct_elite_pic*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Elite - Text" = lm(votecertainty.1 ~ resp_prop_correct_elite_text*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Event" = lm(votecertainty.1 ~ resp_prop_correct_event*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Factual" = lm(votecertainty.1 ~ resp_prop_correct_factual*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model)
)

#internal efficacy
internal_efficacy <- list(
  "Composite" = lm(internal_efficacy ~ resp_prop_correct*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Composite\n(Scaled)" = lm(internal_efficacy ~ resp_prop_correct_scaled*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Elite - Pictures" = lm(internal_efficacy ~ resp_prop_correct_elite_pic*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Elite - Text" = lm(internal_efficacy ~ resp_prop_correct_elite_text*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Event" = lm(internal_efficacy ~ resp_prop_correct_event*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Factual" = lm(internal_efficacy ~ resp_prop_correct_factual*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model)
)

#likelihood to vote
likelihood_vote <- list(
  "Composite" = lm(likelihoodvote.1 ~ resp_prop_correct*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Composite\n(Scaled)" = lm(likelihoodvote.1 ~ resp_prop_correct_scaled*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Elite - Pictures" = lm(likelihoodvote.1 ~ resp_prop_correct_elite_pic*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Elite - Text" = lm(likelihoodvote.1 ~ resp_prop_correct_elite_text*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Event" = lm(likelihoodvote.1 ~ resp_prop_correct_event*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model),
  "Factual" = lm(likelihoodvote.1 ~ resp_prop_correct_factual*resp_cheated + female + educ.2 + age.2, data = ger_respondent_model)
)

# Table C1. Political Interest
modelsummary::modelsummary(political_interest, 
                           title = "Political interest and cheating interaction model. Rows correspond to different measures of political knowledge, columns to the outcomes predicted",
                           coef_rename=names_for_coefs,
                           gof_omit = 'DF|Deviance|AIC|BIC|Log.Lik',
                           output = "latex") %>%
  kableExtra::row_spec(13:14, bold = T) %>%
  kableExtra::save_kable("figures/tab-c1-pred_ex_polint.tex")

# Table C2. Vote certainty
modelsummary::modelsummary(vote_certainty, 
                           title = " Vote certainty and cheating interaction model. Rows correspond to different measures of political knowledge, columns to the outcomes predicted",
                           coef_rename=names_for_coefs,
                           gof_omit = 'DF|Deviance|AIC|BIC|Log.Lik',
                           output = "latex") %>%
  kableExtra::row_spec(13:14, bold = T) %>%
  kableExtra::save_kable("figures/tab-c2-pred_ex_vote_certainty.tex")

# Table C3. Internal efficacy
modelsummary::modelsummary(internal_efficacy, 
                           title = "Internal efficacy and cheating interaction model. Rows correspond to different measures of political knowledge, columns to the outcomes predicted",
                           coef_rename=names_for_coefs,
                           gof_omit = 'DF|Deviance|AIC|BIC|Log.Lik',
                           output = "latex") %>%
  kableExtra::row_spec(13:14, bold = T) %>%
  kableExtra::save_kable("figures/tab-c3-pred_ex_internal_efficacy.tex")

# Table C4. Likelihood to vote
modelsummary::modelsummary(likelihood_vote, 
                           title = "Likelihood to vote and cheating interaction model. Rows correspond to different measures of political knowledge, columns to the outcomes predicted",
                           coef_rename=names_for_coefs,
                           gof_omit = 'DF|Deviance|AIC|BIC|Log.Lik',
                           output = "latex") %>%
  kableExtra::row_spec(13:14, bold = T) %>%
  kableExtra::save_kable("figures/tab-c4-pred_ex_likelihood_vote.tex")

closeAllConnections() # Close connection to log file